#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright: Larne Pekowsky <larnep@gmail.com>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
#
# Strip Heisig keywords out of an MCD batch created by MCDGenerator
# See http://ichi2.net/anki/wiki/JapaneseSupport
#

import sys
import codecs
from optparse import OptionParser

def parse_command_line():
    parser = OptionParser(
                version = "Name: %prog%\n",
                usage   = "%prog: <args>\n",
                description = "Strip Heisig keywords out of an MCD batch created by MCDGenerator\n")

    parser.add_option('-i', '--input-file', metavar='infile', help='File containing text to process')
    parser.add_option('-o', '--output-file', metavar='outfile', help='File to write')

    options, others = parser.parse_args()

    return options, others

if __name__ == '__main__':
    options, others = parse_command_line()

    if options.input_file:
        infile = open(options.input_file)
    elif len(others) > 0:
        infile = open(others[0])
    else:
        infile = sys.stdin

    if options.output_file:
        outfile = open(options.output_file, 'w')
    else:
        outfile = sys.stdout

    instream = codecs.getreader("utf-8")(infile)

    for line in instream:
        line      = line.strip()
        fields    = line.split("\t")
        question  = fields[0]
        newstring = ''
        pos       = 0
        
        while True:
            pos = question.find('[', pos)
            if pos == -1:
                newstring += question
                break

            newstring += question[:pos+1]
            newstring += u'...'
            pos      = question.find(']', pos)
            question = question[pos:]

        fields[0] = newstring
        print >>outfile, "\t".join(fields).encode('utf-8')

